# -*- coding: utf-8 -*-
"""
Code to replicate the simulations in The Consequences of Using a Top 5/RCV System in Nevada by Gelman, Pritsos and Reilly
Created: March 5, 2024
Most recent update: September 10, 2024

"""

import pyrankvote
from pyrankvote import Candidate, Ballot
import os
import pandas as pd
import numpy as np
import random
import time

dy_text=pd.read_csv('/replication/dynata_text.csv') #Imports Dynata sample with text answers
cv_text=pd.read_csv('/replication/convenience_text.csv') #Imports convenience sample with text answers

frames = [dy_text, cv_text] 
data_text = pd.concat(frames) #These lines create a single dataframe
data_text = data_text[data_text['omit'] != 1] #Gets rid of the 8 observations that are being omitted due to click-throughs
data_text = data_text.reset_index()
data_text=data_text.drop(columns=['index'])

#Rename data columns
data_text=data_text.rename(columns={'generic_general_1':"obama", 'generic_general_2':'trump', 'generic_general_3':'sanders', 'generic_general_4':'johnson', 'generic_general_5':'romney'})
data_text['obama'] = data_text['obama'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['trump'] = data_text['trump'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['sanders'] = data_text['sanders'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['johnson'] = data_text['johnson'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})
data_text['romney'] = data_text['romney'].map({'1st choice': 1, '2nd choice': 2, '3rd choice': 3, '4th choice': 4, '5th choice': 5})

#Create partisanship variable - this preserves independents as non-partisans. 
data_text['democrat'] = np.where((data_text['Q1']=='Democrat'), 1, 0) 
data_text['republican'] = np.where((data_text['Q1']=='Republican'), 1, 0) 
data_text['independent'] = np.where((data_text['Q1']=='Independent'), 1, 0) 
data_text['third_party'] = np.where((data_text['Q1']=='Something else'), 1, 0) 
data_text['democrat'].sum()
data_text['republican'].sum()
data_text['independent'].sum()
data_text['third_party'].sum()

#Single category for party ID
data_text.loc[data_text['democrat']==1, 'party_id']=0
data_text.loc[data_text['republican']==1, 'party_id']=1
data_text.loc[data_text['independent']==1, 'party_id']=2
data_text.loc[data_text['third_party']==1, 'party_id']=3

#Create the number of hypothetical partisan splits
all_splits=[]
split = []
party_split = ['dem', 'rep', 'ind']
case=0
while(case<100): #How many partisan splits you want
    choice=1
    while(choice<95): #95% voters assumed in splits (5% as third party)
        split.append(random.choice(party_split))
        choice=choice+1
        if choice==95: #Once 95 iterations are hit, how to store the data or skip
            split.append(random.choice(party_split))    
            dem = split.count('dem')
            rep = split.count('rep')
            ind = split.count('ind')
            new_split = [dem, rep, ind]
            if dem>45 or dem<25: 
                split=[] 
                pass
            elif rep>45 or rep<25: 
                split=[] 
                pass
            elif ind>30 or ind<15:     
                split=[] 
                pass
            elif new_split==all_splits:
                split=[]
                pass
            else: 
                all_splits.append(new_split)
                split=[]
                case=case+1
                print("case", case)
        
#Primary winner
primary_results = pd.DataFrame()
results_hold = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    start=time.time()
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    splits=splits+1
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count<500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            generic_primary_counts = data2.groupby(['generic_primary']).size().reset_index(name='counts')
            generic_primary_counts = generic_primary_counts
            outcome = pd.DataFrame()
            outcome = generic_primary_counts.nlargest(5, ['counts']) 
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep

            results_hold = pd.concat([results_hold, outcome])
            outcome = pd.DataFrame()
            count=count+1
        else: 
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            generic_primary_counts = data2.groupby(['generic_primary']).size().reset_index(name='counts')
            generic_primary_counts = generic_primary_counts
            outcome = pd.DataFrame()
            outcome = generic_primary_counts.nlargest(5, ['counts']) 
            outcome = outcome.sort_values(by=['counts'], ascending=False)
            outcome.index = np.arange(1, len(outcome)+1)
            outcome = outcome.reset_index()
            outcome['sim']=sim
            outcome['dem']=dem
            outcome['rep']=rep
            outcome['ind']=rep
                   
            results_hold = pd.concat([results_hold, outcome])
            outcome = pd.DataFrame()
            count=count+1

    primary_results = pd.concat([primary_results, results_hold])
    results_hold = pd.DataFrame()
    sim = sim+1
    end = time.time()
    print(end-start)


advance2 = primary_results.groupby(['dem'])['sim'].count()
advance2 = advance2.reset_index()
advance2=advance2.rename(columns={'sim':"total",})
advance2['total']= advance2['total']/5                                  

advance3 = primary_results.groupby(['generic_primary','dem'])[('index')].count()
advance3 = advance3.reset_index()
primary_winners = pd.merge(advance3, advance2, how='inner', on='dem')
primary_winners['win_perc'] = primary_winners['index']/primary_winners['total']

os.chdir('/replication/')
primary_winners.to_csv('generic_primary.csv', index=False)

#This is total counts of winning
advance0 = primary_results.groupby(['generic_primary']).count()
advance0 = advance0.reset_index()
advance0=advance0.rename(columns={'sim':"total",})
advance0['total']= advance0['total']/5000000
advance0.to_csv('generic_primary_agg.csv', index=False)

#Run the ranked choice outcomes
champs=[]
results = pd.DataFrame()
count=0
third=5
sim=1
splits=0
for i in all_splits:
    count=1
    dem=i[0]
    rep=i[1]
    ind=i[2]
    splits=splits+1
    print(dem, rep, ind, splits)
    while (count<=500):      
        if count==500:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            generic = data2[['obama', 'trump', 'sanders', 'johnson', 'romney']]
            generic = generic.reset_index()
            generic_rcv = pd.melt(generic, id_vars='index', value_vars=['obama', 'trump', 'sanders', 'johnson', 'romney'])
            generic_rcv= generic_rcv.sort_values(by=['index', 'value'])
            generic_rcv = generic_rcv.dropna(subset = ['value'])
            generic_rcv['index']=generic_rcv['index']+100
            generic_rcv['dups'] = generic_rcv.duplicated(subset=['index', 'value'], keep=False)
            generic_rcv['value']=generic_rcv['value'].astype(int)
            generic_rcv.loc[generic_rcv['dups']==True, 'value']='$OVERVOTE'
            generic_rcv['value']=generic_rcv['value'].fillna('$UNDERVOTE')
            generic_rcv['value']=generic_rcv['value'].astype(str)
            generic_rcv['index']=generic_rcv['index'].astype(str)
            generic_rcv = generic_rcv.iloc[:,[0,2,1]]
            generic_rcv=generic_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = generic_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]      
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate
                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)

            obama=Candidate('obama') #CHANGE
            romney=Candidate('romney') #CHANGE
            trump=Candidate('trump') #CHANGE
            sanders=Candidate('sanders') #CHANGE
            johnson=Candidate('johnson') #CHANGE

            candidates=[obama, trump, sanders, johnson, romney]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
            obama_win = champs.count('obama') #CHANGE
            sanders_win = champs.count('sanders') #CHANGE
            romney_win = champs.count('romney') #CHANGE
            johnson_win = champs.count('johnson') #CHANGE
            trump_win = champs.count('trump') #CHANGE
            
            sim=sim+1
            #Create temporary df to store results
            temp_results = pd.DataFrame()
            temp_results = pd.DataFrame([{'sim':sim, 'dem':dem,'rep':rep, 'ind':ind, 'third':third, 
                                      'num_obama':obama_win, 'num_sanders':sanders_win, 'num_romney':romney_win, 'num_johnson':johnson_win, 'num_trump':trump_win}])

            results = pd.concat([results, temp_results])
            temp_results = pd.DataFrame()
            champs=[]
            count=count+1
            #print(count)

        else:
            data_text.loc[data_text['democrat']==1, 'prop']=dem/100
            data_text.loc[data_text['republican']==1, 'prop']=rep/100
            data_text.loc[data_text['independent']==1, 'prop']=ind/100
            data_text.loc[data_text['third_party']==1, 'prop']=third/100
            data2 = data_text.sample(n=316, random_state=count, weights=data_text['prop'])
            generic = data2[['obama', 'trump', 'sanders', 'johnson', 'romney']]
            generic = generic.reset_index()
            generic_rcv = pd.melt(generic, id_vars='index', value_vars=['obama', 'trump', 'sanders', 'johnson', 'romney'])
            generic_rcv= generic_rcv.sort_values(by=['index', 'value'])
            generic_rcv = generic_rcv.dropna(subset = ['value'])
            generic_rcv['index']=generic_rcv['index']+10
            generic_rcv['dups'] = generic_rcv.duplicated(subset=['index', 'value'], keep=False)
            generic_rcv['value']=generic_rcv['value'].astype(int)
            generic_rcv.loc[generic_rcv['dups']==True, 'value']='$OVERVOTE'
            generic_rcv['value']=generic_rcv['value'].fillna('$UNDERVOTE')
            generic_rcv['value']=generic_rcv['value'].astype(str)
            generic_rcv['index']=generic_rcv['index'].astype(str)
            generic_rcv = generic_rcv.iloc[:,[0,2,1]]
            generic_rcv=generic_rcv.rename(columns={'index':"ballot_id", 'value':'rank', 'variable':'candidate_name'})
            ranks2 = generic_rcv.values.tolist()
            tups = [tuple(l) for l in ranks2]
        
            parsed_csv_file = [(ballot_id, rank, candidate_name) for ballot_id, rank, candidate_name in tups]        #sorted_csv_file = sorted(parsed_csv_file, key=itemgetter(0,1))
            sorted_csv_file = parsed_csv_file
    
            candidates = {}
            ballots = []
            last_ballot_id = 0
            ranked_candidates = []

            for ballot_id, rank, candidate_name in sorted_csv_file:
                if ballot_id != last_ballot_id and last_ballot_id != 0:
                    ballot = Ballot(ranked_candidates)
                    ballots.append(ballot)
                    ranked_candidates = []

                last_ballot_id = ballot_id
                if candidate_name == "$UNDERVOTE":
                    continue
                if candidate_name == "$OVERVOTE":
                    continue
                if candidate_name in candidates:
                    candidate = candidates[candidate_name]
                else:
                    candidate = Candidate(name=candidate_name)
                    candidates[candidate_name] = candidate

                ranked_candidates.append(candidate)
    
            ballot = Ballot(ranked_candidates)
            ballots.append(ballot)

            obama=Candidate('obama')
            romney=Candidate('romney')
            trump=Candidate('trump')
            sanders=Candidate('sanders')
            johnson=Candidate('johnson')

            candidates=[obama, trump, sanders, johnson, romney]
            election_result = pyrankvote.instant_runoff_voting(candidates, ballots, pick_random_if_blank=False)
            winners = election_result.get_winners()
            win = str(winners)
            first=win[win.find("(")+1:win.find(")")]
            first = first.replace("'", "")
            champs.append(first)
        
            count=count+1

results= results.sort_values(by=['dem', 'rep'])
results2 = results.groupby(['dem'])[('sim')].count()
results2= results2.to_frame()
results_wins = results.groupby(['dem'])[('num_obama', 'num_trump', 'num_romney', 'num_johnson', 'num_sanders')].sum()
final_winners = pd.merge(results2, results_wins, how='inner', on='dem')
final_winners['obama_perc'] = final_winners['num_obama']/(final_winners['sim']*500)
final_winners['trump_perc'] = final_winners['num_trump']/(final_winners['sim']*500)
final_winners['sanders_perc'] = final_winners['num_sanders']/(final_winners['sim']*500)
final_winners['johnson_perc'] = final_winners['num_johnson']/(final_winners['sim']*500)
final_winners['romney_perc'] = final_winners['num_romney']/(final_winners['sim']*500)
final_winners = final_winners.reset_index()

final_winners.to_csv('generic_general.csv', index=False)



